pacman::p_load(
  "data.table",
  "ggplot2",
  "stringr",
  "tidyr",
  "dplyr",
  "ggtext",
  "pheatmap",
  "purrr",
  "here",
  "ggnewscale"
)
source("../src/constants.R")
source("../src/themes.R")
source("../src/utility.R")
direct_label = "Direct"
indirect_label = "Indirect"
sample = "fecal"
motifs_direct <-  fread(paste0("../data/", sample, "/nanomotif/motifs.tsv"))[, directly := TRUE]
motifs_score <- fread(paste0("../data/", sample, "/nanomotif/motifs-scored.tsv"))
|--------------------------------------------------|
|==================================================|
motifs <- merge(
  motifs_score, motifs_direct, all.x = TRUE, by=c("contig", "motif", "mod_position", "mod_type"), suffixes = c("", ".drop"))[
  , directly := fifelse(is.na(directly), indirect_label, direct_label)
]

euk <- fread(paste0("../data/", sample, "/mmlong2_lite/tmp/eukfilt/tiara"))
bin <- fread(paste0("../data/", sample, "/mmlong2_lite/tmp/binning/contig_bin.tsv"), header = FALSE)
colnames(bin) <-  c("contig", "bin")
bin[, n_contigs := .N, by = bin]
tax <- fread(paste0("../data/", sample, "/gtdb-tk_mmlong2/gtdbtk.bac120.summary.tsv"))

motifs_full <- merge(motifs, euk, by.x = "contig", by.y = "sequence_id", all.x = TRUE)[class_fst_stage == "bacteria"] %>% 
  merge(bin, by = "contig", all.x = TRUE) %>% 
  merge(tax, by.x = "bin", by.y = "user_genome", all.x = TRUE)

motifs_full <- motifs_full[
    , mean := n_mod / (n_mod + n_nomod-1)
  ][
    , species := sapply(classification, extract_species_from_gtdbtk)
  ][
    , name := fcase(
      is.na(species), contig,
      !is.na(species), paste0(species, "_", contig)
    )
  ][
    , motif_markdown := paste0(
        str_sub(motif, 1, mod_position),
        "**",
        map(mod_type, function(x) MOD_TYPE_PRETTY[[x]]) %>%  unlist(),
        str_sub(motif, mod_position+1, mod_position+1),
        "**",
        str_sub(motif, mod_position+2, -1)
    )
  ][
    , simple_count := fcase(
      (n_mod + n_nomod) > 10, "",
      (n_mod + n_nomod) <= 10, as.character(n_mod + n_nomod - 1)
    )
  ]
motifs_full_binned <- motifs_full[!is.na(bin)]


bin_motifs <- fread(paste0("../data/", sample, "/nanomotif/bin-motifs.tsv")) %>% 
  merge(tax, by.x = "bin", by.y = "user_genome", all.x = TRUE)
bin_motifs[
    , mean := n_mod_bin / (n_mod_bin + n_nomod_bin-1)
  ][
    , species := sapply(classification, extract_species_from_gtdbtk)
  ][
    , name := fcase(
      is.na(species), bin,
      !is.na(species), paste0(species, "_", bin)
    )
  ][
    , motif_markdown := paste0(
        str_sub(motif, 1, mod_position),
        "**",
        map(mod_type, function(x) MOD_TYPE_PRETTY[[x]]) %>%  unlist(),
        str_sub(motif, mod_position+1, mod_position+1),
        "**",
        str_sub(motif, mod_position+2, -1)
    )
  ]
get_axis_order <- function(wide_df) {
  set.seed(1)
  feature_matrix = as.matrix(wide_df[,-1] )
  y_row_order <- pheatmap::pheatmap(as.matrix(feature_matrix), silent = TRUE)$tree_row$order
  y_factor_order <- wide_df$name[y_row_order]
  x_row_order <- pheatmap::pheatmap(t(as.matrix(feature_matrix)), silent = TRUE)$tree_row$order
  x_factor_order <- colnames(wide_df[,-1])[x_row_order]
  return(list(
    y = y_factor_order,
    x = x_factor_order
  ))
}
y_order <- unique(motif_full[order(bin, mean)]$name)
plot_data <- motif_full[!is.na(n_contigs)]
axis_orders <- get_axis_order(dcast(plot_data, name ~ motif_markdown, value.var = "mean", fill = 0, fun.aggregate = mean))
bin_contigs_heatmap <- ggplot(plot_data) +
  aes(x = factor(motif_markdown, levels = axis_orders[["x"]]), y = factor(name, levels =  axis_orders[["y"]])) +
  geom_tile(aes(fill = directly), color = "#ffffff00", alpha = 0) +
  scale_fill_manual(values = c("Direct"=direct_color, "Indirect"=indirect_color)) +
  guides(fill = guide_legend(
    title = "Method of detection",
    override.aes = list(alpha = 1))
  ) +
  new_scale("fill") +
  
  geom_tile(aes(fill = mean),  color = "#ffffff00",linewidth = 0) +
  scale_fill_gradient2(guide = "none", limits = c(0, 1), low = "white", high = direct_color, na.value = "white") +
  new_scale("fill") +
  
  geom_tile(data=plot_data[directly == indirect_label], aes(fill = mean),  color = "#ffffff00",linewidth = 0) + 
  scale_fill_gradient2(guide = "none", limits = c(0, 1), low = "white", high = indirect_color, na.value = "white") +
  labs(fill = "Directly") +  
  
  new_scale("fill") +
  geom_text(data=plot_data[(n_mod + n_nomod - 1)==0], label = ".", hjust=0.5, vjust=0, size=1) +
  geom_tile(aes(fill = mean), linewidth = 0, alpha = 0, color = "#33333300") +
  scale_fill_gradient2(
    limits = c(0, 1), low = "white", high = "gray20",
    labels = scales::percent
  ) +
  theme(
    axis.text.y = element_blank(),# element_markdown(vjust = 0.5, hjust = 0, size = 0.5),
    axis.ticks.y = element_blank(),
    axis.text.x = element_markdown(angle = 90, hjust = 1, vjust = 0.5, size = 5),
    panel.grid.minor = element_blank(),  # Remove minor gridlines
    panel.background = element_blank(),   # Remove panel background,
    panel.margin = unit(0, "lines"),
    panel.border = element_rect(colour = "#141414", fill=NA, linewidth = 0.1),
    strip.placement = "outside",
    strip.text.y.left = element_markdown(angle = 0, vjust = 0.5, hjust = 0),
    strip.background = element_blank()
  ) +
  labs(
    x = "Motif",
    y = "",
    fill = "Methylation degree"
  ) +
  facet_wrap(~bin, scales = "free_y", ncol = 1, strip.position = "left")+
  geom_point(aes(color = "No motif observations"), shape = 16, size = 1, alpha = 0) +
  #geom_point(data = data.frame(x = NA, y = NA), aes(x = x, y = y, color = "Assigned MTase"), 
  #           shape = 8, size = 1, alpha=0) +
  scale_color_manual(name = "Indicator", values = c("No motif observations" = "black")) +
  guides(color = guide_legend(override.aes = list(alpha = 1, shape = c(16)))) # Adjust this line as needed
bin_contigs_heatmap


axis_orders <- get_axis_order(dcast(motifs_full_binned, name ~ motif_markdown, value.var = "mean", fill = 0, fun.aggregate = mean))
hm_fecal <- ggplot(motifs_full_binned) +
  aes(x = factor(motif_markdown, levels = axis_orders[["x"]]), y = factor(name, levels = axis_orders[["y"]])) + 
  geom_tile(aes(fill = directly), linewidth = 0, alpha = 0) + 
  scale_fill_manual(values = c("Direct"="forestgreen", "Indirect"="#8C228C")) +
  guides(fill = guide_legend(
    title = "Method of detection",
    override.aes = list(alpha = 1))
  ) +
  new_scale("fill") +
  geom_tile(aes(fill = mean), linewidth = 0) +   
  scale_fill_gradient2(guide = "none", limits = c(0, 1), low = "white", high = "forestgreen", na.value = "white") +
  new_scale("fill") +
  geom_tile(data=motifs_full_binned[directly == indirect_label], aes(fill = mean), linewidth = 0) + 
  scale_fill_gradient2(guide = "none", limits = c(0, 1), low = "white", high = "#8C228C", na.value = "white") +
  labs(fill = "Directly") + 
  new_scale("fill") +
  geom_tile(aes(fill = mean), linewidth = 0, color = "gray20", alpha = 0) +
  scale_fill_gradient2(
    limits = c(0, 1), low = "white", high = "gray20",
    labels = scales::percent
  ) +
  #geom_text(data=fecal_binned[(n_mod + n_nomod - 1)==0], label = ".", hjust=0.5, vjust=0, size=0.1) +
  #coord_fixed(ratio= 1, clip="off") +
  theme(
    axis.text.y = element_markdown(vjust = 0.5, hjust = 0, size = 1), # Rotate x-axis labels to 90 degrees
    axis.text.x = element_markdown(angle = 90, hjust = 1, vjust = 0.5, size = 2),
    panel.grid.minor = element_blank(),  # Remove minor gridlines
    panel.background = element_blank(),   # Remove panel background,
  ) +
  labs(
    x = "Motif",
    y = "",
    fill = "Methylation degree"
  )
ggsave(
  "../figures/fecal_hm.png",
  hm_fecal
)
Saving 7 x 7 in image

axis_orders <- get_axis_order(dcast(bin_motifs, name ~ motif_markdown, value.var = "mean", fill = 0, fun.aggregate = mean))
hm_fecal <- ggplot(bin_motifs) +
  aes(x = factor(motif_markdown, levels = axis_orders[["x"]]), y = factor(name, levels = axis_orders[["y"]])) + 
  geom_tile(aes(fill = mean), linewidth = 0) +   
  scale_fill_gradient2(guide = "none", limits = c(0, 1), low = "white", high = "forestgreen", na.value = "white") +
  new_scale("fill") +
  geom_tile(aes(fill = mean), linewidth = 0, color = "gray20", alpha = 0) +
  scale_fill_gradient2(
    limits = c(0, 1), low = "white", high = "gray20",
    labels = scales::percent
  ) +
  #geom_text(data=bin_motifsned[(n_mod + n_nomod - 1)==0], label = ".", hjust=0.5, vjust=0, size=0.1) +
  #coord_fixed(ratio= 1, clip="off") +
  theme(
    axis.text.y = element_markdown(vjust = 0.5, hjust = 0, size = 1), # Rotate x-axis labels to 90 degrees
    axis.text.x = element_markdown(angle = 90, hjust = 1, vjust = 0.5, size = 2),
    panel.grid.minor = element_blank(),  # Remove minor gridlines
    panel.background = element_blank(),   # Remove panel background,
  ) +
  labs(
    x = "Motif",
    y = "",
    fill = "Methylation degree"
  )
hm_fecal

---
title: "R Notebook"
output:
  html_document:
    df_print: paged
  html_notebook: default
  pdf_document: default
editor_options:
  chunk_output_type: inline
---


```{r}
pacman::p_load(
  "data.table",
  "ggplot2",
  "stringr",
  "tidyr",
  "dplyr",
  "ggtext",
  "pheatmap",
  "purrr",
  "here",
  "ggnewscale"
)
source("../src/constants.R")
source("../src/themes.R")
source("../src/utility.R")
direct_label = "Direct"
indirect_label = "Indirect"
sample = "anaerobic_digestor"
```


```{r}
motifs_direct <-  fread(paste0("../data/", sample, "/nanomotif/motifs.tsv"))[, directly := TRUE]
motifs_score <- fread(paste0("../data/", sample, "/nanomotif/motifs-scored.tsv"))
motifs <- merge(
  motifs_score, motifs_direct, all.x = TRUE, by=c("contig", "motif", "mod_position", "mod_type"), suffixes = c("", ".drop"))[
  , directly := fifelse(is.na(directly), indirect_label, direct_label)
]

euk <- fread(paste0("../data/", sample, "/mmlong2_lite/tmp/eukfilt/tiara"))
bin <- fread(paste0("../data/", sample, "/mmlong2_lite/tmp/binning/contig_bin.tsv"), header = FALSE)
colnames(bin) <-  c("contig", "bin")
bin[, n_contigs := .N, by = bin]
tax <- fread(paste0("../data/", sample, "/gtdb-tk_mmlong2/gtdbtk.bac120.summary.tsv"))

motifs_full <- merge(motifs, euk, by.x = "contig", by.y = "sequence_id", all.x = TRUE)[class_fst_stage == "bacteria"] %>% 
  merge(bin, by = "contig", all.x = TRUE) %>% 
  merge(tax, by.x = "bin", by.y = "user_genome", all.x = TRUE)

motifs_full <- motifs_full[
    , mean := n_mod / (n_mod + n_nomod-1)
  ][
    , species := sapply(classification, extract_species_from_gtdbtk)
  ][
    , name := fcase(
      is.na(species), contig,
      !is.na(species), paste0(species, "_", contig)
    )
  ][
    , motif_markdown := paste0(
        str_sub(motif, 1, mod_position),
        "**",
        map(mod_type, function(x) MOD_TYPE_PRETTY[[x]]) %>%  unlist(),
        str_sub(motif, mod_position+1, mod_position+1),
        "**",
        str_sub(motif, mod_position+2, -1)
    )
  ][
    , simple_count := fcase(
      (n_mod + n_nomod) > 10, "",
      (n_mod + n_nomod) <= 10, as.character(n_mod + n_nomod - 1)
    )
  ]
motifs_full_binned <- motifs_full[!is.na(bin)]


bin_motifs <- fread(paste0("../data/", sample, "/nanomotif/bin-motifs.tsv")) %>% 
  merge(tax, by.x = "bin", by.y = "user_genome", all.x = TRUE)
bin_motifs[
    , mean := n_mod_bin / (n_mod_bin + n_nomod_bin-1)
  ][
    , species := sapply(classification, extract_species_from_gtdbtk)
  ][
    , name := fcase(
      is.na(species), bin,
      !is.na(species), paste0(species, "_", bin)
    )
  ][
    , motif_markdown := paste0(
        str_sub(motif, 1, mod_position),
        "**",
        map(mod_type, function(x) MOD_TYPE_PRETTY[[x]]) %>%  unlist(),
        str_sub(motif, mod_position+1, mod_position+1),
        "**",
        str_sub(motif, mod_position+2, -1)
    )
  ]
```


```{r}
get_axis_order <- function(wide_df) {
  set.seed(1)
  feature_matrix = as.matrix(wide_df[,-1] )
  y_row_order <- pheatmap::pheatmap(as.matrix(feature_matrix), silent = TRUE)$tree_row$order
  y_factor_order <- wide_df$name[y_row_order]
  x_row_order <- pheatmap::pheatmap(t(as.matrix(feature_matrix)), silent = TRUE)$tree_row$order
  x_factor_order <- colnames(wide_df[,-1])[x_row_order]
  return(list(
    y = y_factor_order,
    x = x_factor_order
  ))
}

```

```{r}
y_order <- unique(motifs_full[order(bin, mean)]$name)
plot_data <- motifs_full[!is.na(n_contigs)]
axis_orders <- get_axis_order(dcast(plot_data, name ~ motif_markdown, value.var = "mean", fill = 0, fun.aggregate = mean))
bin_contigs_heatmap <- ggplot(plot_data) +
  aes(x = factor(motif_markdown, levels = axis_orders[["x"]]), y = factor(name, levels =  axis_orders[["y"]])) +
  geom_tile(aes(fill = directly), color = "#ffffff00", alpha = 0) +
  scale_fill_manual(values = c("Direct"=direct_color, "Indirect"=indirect_color)) +
  guides(fill = guide_legend(
    title = "Method of detection",
    override.aes = list(alpha = 1))
  ) +
  new_scale("fill") +
  
  geom_tile(aes(fill = mean),  color = "#ffffff00",linewidth = 0) +
  scale_fill_gradient2(guide = "none", limits = c(0, 1), low = "white", high = direct_color, na.value = "white") +
  new_scale("fill") +
  
  geom_tile(data=plot_data[directly == indirect_label], aes(fill = mean),  color = "#ffffff00",linewidth = 0) + 
  scale_fill_gradient2(guide = "none", limits = c(0, 1), low = "white", high = indirect_color, na.value = "white") +
  labs(fill = "Directly") +  
  
  new_scale("fill") +
  geom_text(data=plot_data[(n_mod + n_nomod - 1)==0], label = ".", hjust=0.5, vjust=0, size=1) +
  geom_tile(aes(fill = mean), linewidth = 0, alpha = 0, color = "#33333300") +
  scale_fill_gradient2(
    limits = c(0, 1), low = "white", high = "gray20",
    labels = scales::percent
  ) +
  theme(
    axis.text.y = element_blank(),# element_markdown(vjust = 0.5, hjust = 0, size = 0.5),
    axis.ticks.y = element_blank(),
    axis.text.x = element_markdown(angle = 90, hjust = 1, vjust = 0.5, size = 5),
    panel.grid.minor = element_blank(),  # Remove minor gridlines
    panel.background = element_blank(),   # Remove panel background,
    panel.margin = unit(0, "lines"),
    panel.border = element_rect(colour = "#141414", fill=NA, linewidth = 0.1),
    strip.placement = "outside",
    strip.text.y.left = element_markdown(angle = 0, vjust = 0.5, hjust = 0),
    strip.background = element_blank()
  ) +
  labs(
    x = "Motif",
    y = "",
    fill = "Methylation degree"
  ) +
  facet_wrap(~bin, scales = "free_y", ncol = 1, strip.position = "left")+
  geom_point(aes(color = "No motif observations"), shape = 16, size = 1, alpha = 0) +
  #geom_point(data = data.frame(x = NA, y = NA), aes(x = x, y = y, color = "Assigned MTase"), 
  #           shape = 8, size = 1, alpha=0) +
  scale_color_manual(name = "Indicator", values = c("No motif observations" = "black")) +
  guides(color = guide_legend(override.aes = list(alpha = 1, shape = c(16)))) # Adjust this line as needed

height_hm <- 4.5
width_hm <- 8
plot_name <- "fecal_complex_hm"
ggsave(
  paste0("../figures/", plot_name, ".png"),
  bin_contigs_heatmap,
  height = height_hm,
  width = width_hm,
  dpi = "retina",
  bg = "white")
```



```{r}

axis_orders <- get_axis_order(dcast(motifs_full_binned, name ~ motif_markdown, value.var = "mean", fill = 0, fun.aggregate = mean))
hm <- ggplot(motifs_full_binned) +
  aes(x = factor(motif_markdown, levels = axis_orders[["x"]]), y = factor(name, levels = axis_orders[["y"]])) + 
  geom_tile(aes(fill = directly), linewidth = 0, alpha = 0) + 
  scale_fill_manual(values = c("Direct"="forestgreen", "Indirect"="#8C228C")) +
  guides(fill = guide_legend(
    title = "Method of detection",
    override.aes = list(alpha = 1))
  ) +
  new_scale("fill") +
  geom_tile(aes(fill = mean), linewidth = 0) +   
  scale_fill_gradient2(guide = "none", limits = c(0, 1), low = "white", high = "forestgreen", na.value = "white") +
  new_scale("fill") +
  geom_tile(data=motifs_full_binned[directly == indirect_label], aes(fill = mean), linewidth = 0) + 
  scale_fill_gradient2(guide = "none", limits = c(0, 1), low = "white", high = "#8C228C", na.value = "white") +
  labs(fill = "Directly") + 
  new_scale("fill") +
  geom_tile(aes(fill = mean), linewidth = 0, color = "gray20", alpha = 0) +
  scale_fill_gradient2(
    limits = c(0, 1), low = "white", high = "gray20",
    labels = scales::percent
  ) +
  #geom_text(data=fecal_binned[(n_mod + n_nomod - 1)==0], label = ".", hjust=0.5, vjust=0, size=0.1) +
  #coord_fixed(ratio= 1, clip="off") +
  theme(
    axis.text.y = element_markdown(vjust = 0.5, hjust = 0, size = 1), # Rotate x-axis labels to 90 degrees
    axis.text.x = element_markdown(angle = 90, hjust = 1, vjust = 0.5, size = 2),
    panel.grid.minor = element_blank(),  # Remove minor gridlines
    panel.background = element_blank(),   # Remove panel background,
  ) +
  labs(
    x = "Motif",
    y = "",
    fill = "Methylation degree"
  )
```



```{r}
ggsave(
  "../figures/binned_contig_heatmap_", sample, ".png",
  hm,
  width = 25,
  height = 25
)
```
```{r}

axis_orders <- get_axis_order(dcast(bin_motifs, name ~ motif_markdown, value.var = "mean", fill = 0, fun.aggregate = mean))
hm_fecal <- ggplot(bin_motifs) +
  aes(x = factor(motif_markdown, levels = axis_orders[["x"]]), y = factor(name, levels = axis_orders[["y"]])) + 
  geom_tile(aes(fill = mean), linewidth = 0) +   
  scale_fill_gradient2(guide = "none", limits = c(0, 1), low = "white", high = "forestgreen", na.value = "white") +
  new_scale("fill") +
  geom_tile(aes(fill = mean), linewidth = 0, color = "gray20", alpha = 0) +
  scale_fill_gradient2(
    limits = c(0, 1), low = "white", high = "gray20",
    labels = scales::percent
  ) +
  #geom_text(data=bin_motifsned[(n_mod + n_nomod - 1)==0], label = ".", hjust=0.5, vjust=0, size=0.1) +
  #coord_fixed(ratio= 1, clip="off") +
  theme(
    axis.text.y = element_markdown(vjust = 0.5, hjust = 0, size = 1), # Rotate x-axis labels to 90 degrees
    axis.text.x = element_markdown(angle = 90, hjust = 1, vjust = 0.5, size = 2),
    panel.grid.minor = element_blank(),  # Remove minor gridlines
    panel.background = element_blank(),   # Remove panel background,
  ) +
  labs(
    x = "Motif",
    y = "",
    fill = "Methylation degree"
  )
hm_fecal
```

```{r}
bin_motifs[
  ,  c("domain", "phylum", "class", "order", "family", "genus", "species") := extraxt_gtdb_tax(classification)
]
```





```{r}
ggplot(bin_motifs) + 
  geom_bar(aes(x=mod_type, fill = motif_type)) +
  base_plot_theme() +
  scale_fill_brewer(palette = "Set1")
  
```

```{r}
ggplot(
  bin_motifs[, .(
    n_motifs = .N
  ), by = .(bin, mod_type)]
) + 
  geom_histogram(aes(x = n_motifs)) + 
  base_plot_theme() + 
  xlim(0, 8) +
  facet_wrap(~mod_type)
```


```{r}
ggplot(bin_motifs) +
  geom_histogram(aes(x=mean)) +
  xlim(0, 1) 
  
```


```{r}
bin_motifs
```
















